# Checking against aggregate production
library(pacman)
p_load(
  here, fst, readxl, data.table 
)

# Reading Mark's results 
prod_hour_mark = read_xlsx(
  here('StructuralCode/PredictedAggProductionbyHour.xlsx'),
  col_names = 'pred_prod.m'
)
prod_unit_mark = read_xlsx(
  here('StructuralCode/PredictedYearlyProductionbyGenerator.xlsx'),
  col_names = 'pred_prod.m'
)
prod_unit_mark_v2 = read_xlsx(
  here('StructuralCode/PredictedYearlyOutput_v2.xlsx'),
  col_names = c('orispl_code','unitid','namepcap','pred_prod.m2'),
  skip = 1
) 

# My calcs of results 
elec_gen_dt = 
  read.fst(
    path = here("Data/electricity-generation/unit-model-fit/elec-gen-fit-dt.fst"),
    as.data.table = TRUE
  )

prod_hour = elec_gen_dt[,.(
  actual_gload = sum(gload_mwh),
  pred_prod.e = sum(fit_gload_mwh)), 
  keyby = utc_time
] |> cbind(prod_hour_mark)
prod_unit = elec_gen_dt[,.(
  actual_gload = sum(gload_mwh),
  pred_prod.e = sum(fit_gload_mwh)), 
  keyby = .(orispl_code, unitid)
] |> cbind(prod_unit_mark) |> cbind(pred_prod.m2 = prod_unit_mark_v2$pred_prod.m2)

# Plotting time series 
p_load(ggplot2)
theme_set(theme_minimal())
ggplot(data = prod_hour, aes(x = utc_time)) + 
  geom_line(aes(y = pred_prod.e))+ 
  geom_line(aes(y = pred_prod.m), color = 'red')
ggplot(data = prod_hour, aes(x = pred_prod.m - pred_prod.e)) + 
  geom_histogram(bins = 100) + 
  geom_vline(xintercept = prod_hour[,median(pred_prod.m - pred_prod.e)], linetype = 'dashed')+
  labs(x = 'Difference', y = 'Count (of hours)')
ggplot(data = prod_unit, aes(x = pred_prod.m2 - pred_prod.e)) + 
  geom_histogram(bins = 100) + 
  geom_vline(xintercept = prod_unit[,median(pred_prod.m2 - pred_prod.e)], linetype = 'dashed')+
  labs(x = 'Difference', y = 'Count (of units)')

ggplot(data = prod_unit, aes(x = pred_prod.e)) + 
  geom_point(aes(y = pred_prod.m2), color = 'black', alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
  labs(x = 'Emmett Predicted Production', y = 'Mark Predicted Production')
ggplot(data = prod_hour, aes(x = pred_prod.e)) +
  geom_point(aes(y = pred_prod.m), color = 'black' , alpha = 0.15) +
  geom_abline(slope = 1, intercept = 0, linetype = 'dashed') +
  labs(x = 'Emmett Predicted Production', y = 'Mark Predicted Production')

prod_unit[pred_prod.m2 - pred_prod.e > 1e5]


# Manually calculating again 
good_model_dt = 
  read.fst(
    path = here("Data/electricity-generation/unit-model-fit/good-model-dt.fst"),
    as.data.table = TRUE
  )[,.(
    orispl_code, unitid, 
    intercept,
    excess_load_cal,excess_load_cal_sq,
    excess_load_mro,excess_load_mro_sq,
    excess_load_npcc,excess_load_npcc_sq,
    excess_load_rfc,excess_load_rfc_sq,
    excess_load_serc,excess_load_serc_sq,
    excess_load_tre,excess_load_tre_sq,
    excess_load_wecc,excess_load_wecc_sq,
    log_scale, est_extremum,
    loglik, loglik_df
  )][orispl_code == 2390 & unitid == '014001'] |>
  setkey(orispl_code, unitid) 
# Loading the electricity generation data
elec_gen_dt=
  read.fst(
    path = here("Data/electricity-generation/elec-gen-dt.fst"),
    as.data.table = TRUE
  )[orispl_code == 2390 & unitid == '014001']|> 
  setkey(orispl_code, unitid, utc_time) 
elec_gen_dt[,
  fit_gload_raw_manual := good_model_dt$intercept + 
    good_model_dt$excess_load_rfc*excess_load_rfc+ 
    good_model_dt$excess_load_rfc_sq*excess_load_rfc_sq
]
# Drawing epsilons 
elec_gen_dt[,epsilon := rnorm(8760, mean = 0, sd = exp(good_model_dt$log_scale))]
# Calculating final fitted value
elec_gen_dt[,fit_gload_manual := fcase(
  fit_gload_raw_manual + epsilon < namepcap & fit_gload_raw_manual + epsilon > 0,
    fit_gload_raw_manual + epsilon,
  fit_gload_raw_manual + epsilon >= namepcap, 
    namepcap,
  fit_gload_raw_manual + epsilon <= 0, 
    0
)]

elec_gen_dt[,.(actual_gload = sum(gload_mwh), fit_gload = sum(fit_gload_manual))]

write.csv(
  elec_gen_dt[,.(
    utc_time, orispl_code, unitid, 
    gload_mwh, fit_gload_mwh = fit_gload_manual,
    fit_gload_mwh_raw = fit_gload_raw_manual, epsilon
  )],
  here('Data/electricity-generation/output/unit-prediction-debug.csv')
)



